import numpy as np # linear algebra import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv) import seaborn as sns import matplotlib.pyplot as plt
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
df = pd.read_csv("C:/Users/kumar/OneDrive/Documents/IV_J_COMPONENT/Space_Corrected.csv")
df.head()
| Unnamed: 0 | Unnamed: 0.1 | Company Name | Location | Datum | Detail | Status Rocket | Rocket | Status Mission | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 0 | SpaceX | LC-39A, Kennedy Space Center, Florida, USA | Fri Aug 07, 2020 05:12 UTC | Falcon 9 Block 5 | Starlink V1 L9 & BlackSky | StatusActive | 50.0 | Success |
| 1 | 1 | 1 | CASC | Site 9401 (SLS-2), Jiuquan Satellite Launch Ce... | Thu Aug 06, 2020 04:01 UTC | Long March 2D | Gaofen-9 04 & Q-SAT | StatusActive | 29.75 | Success |
| 2 | 2 | 2 | SpaceX | Pad A, Boca Chica, Texas, USA | Tue Aug 04, 2020 23:57 UTC | Starship Prototype | 150 Meter Hop | StatusActive | NaN | Success |
| 3 | 3 | 3 | Roscosmos | Site 200/39, Baikonur Cosmodrome, Kazakhstan | Thu Jul 30, 2020 21:25 UTC | Proton-M/Briz-M | Ekspress-80 & Ekspress-103 | StatusActive | 65.0 | Success |
| 4 | 4 | 4 | ULA | SLC-41, Cape Canaveral AFS, Florida, USA | Thu Jul 30, 2020 11:50 UTC | Atlas V 541 | Perseverance | StatusActive | 145.0 | Success |
df.columns
Index(['Unnamed: 0', 'Unnamed: 0.1', 'Company Name', 'Location', 'Datum',
'Detail', 'Status Rocket', ' Rocket', 'Status Mission'],
dtype='object')
df = df.drop(["Unnamed: 0", "Unnamed: 0.1"], axis = 1)
df.head()
| Company Name | Location | Datum | Detail | Status Rocket | Rocket | Status Mission | |
|---|---|---|---|---|---|---|---|
| 0 | SpaceX | LC-39A, Kennedy Space Center, Florida, USA | Fri Aug 07, 2020 05:12 UTC | Falcon 9 Block 5 | Starlink V1 L9 & BlackSky | StatusActive | 50.0 | Success |
| 1 | CASC | Site 9401 (SLS-2), Jiuquan Satellite Launch Ce... | Thu Aug 06, 2020 04:01 UTC | Long March 2D | Gaofen-9 04 & Q-SAT | StatusActive | 29.75 | Success |
| 2 | SpaceX | Pad A, Boca Chica, Texas, USA | Tue Aug 04, 2020 23:57 UTC | Starship Prototype | 150 Meter Hop | StatusActive | NaN | Success |
| 3 | Roscosmos | Site 200/39, Baikonur Cosmodrome, Kazakhstan | Thu Jul 30, 2020 21:25 UTC | Proton-M/Briz-M | Ekspress-80 & Ekspress-103 | StatusActive | 65.0 | Success |
| 4 | ULA | SLC-41, Cape Canaveral AFS, Florida, USA | Thu Jul 30, 2020 11:50 UTC | Atlas V 541 | Perseverance | StatusActive | 145.0 | Success |
df.describe()
| Company Name | Location | Datum | Detail | Status Rocket | Rocket | Status Mission | |
|---|---|---|---|---|---|---|---|
| count | 4324 | 4324 | 4324 | 4324 | 4324 | 964 | 4324 |
| unique | 56 | 137 | 4319 | 4278 | 2 | 56 | 4 |
| top | RVSN USSR | Site 31/6, Baikonur Cosmodrome, Kazakhstan | Tue Jun 26, 1973 | Cosmos-3MRB (65MRB) | BOR-5 Shuttle | StatusRetired | 450.0 | Success |
| freq | 1777 | 235 | 2 | 6 | 3534 | 136 | 3879 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 4324 entries, 0 to 4323 Data columns (total 7 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Company Name 4324 non-null object 1 Location 4324 non-null object 2 Datum 4324 non-null object 3 Detail 4324 non-null object 4 Status Rocket 4324 non-null object 5 Rocket 964 non-null object 6 Status Mission 4324 non-null object dtypes: object(7) memory usage: 236.6+ KB
Rocket Status
ds = df["Status Rocket"].value_counts().reset_index()
ds
| index | Status Rocket | |
|---|---|---|
| 0 | StatusRetired | 3534 |
| 1 | StatusActive | 790 |
fig = px.pie(ds, values = "Status Rocket", names = "index", title = "Rocket Status")
fig.show()
ds = df["Status Mission"].value_counts().reset_index()[:3]
ds
| index | Status Mission | |
|---|---|---|
| 0 | Success | 3879 |
| 1 | Failure | 339 |
| 2 | Partial Failure | 102 |
fig = px.bar(ds, x = "index", y = "Status Mission", title = "Mission Status")
fig.show()
Rocket cost distribution with rocket status
sum(pd.isna(df.loc[:, " Rocket"]))
3360
df_ = df.dropna(subset = [" Rocket"], axis = "rows")
len(df_)
964
sum(pd.isna(df_.loc[:, " Rocket"]))
0
df_.loc[:, " Rocket"]
0 50.0
1 29.75
3 65.0
4 145.0
5 64.68
...
3855 59.0
3971 63.23
3993 63.23
4000 63.23
4020 63.23
Name: Rocket, Length: 964, dtype: object
df_.loc[:, " Rocket"] = df_.loc[:, " Rocket"].fillna(0.0).str.replace(",", "")
df_.loc[:, " Rocket"] = df_.loc[:, " Rocket"].astype(np.float64).fillna(0.0)
C:\Users\kumar\anaconda3\lib\site-packages\pandas\core\indexing.py:1843: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_d = df_[df_.loc[:, " Rocket"] < 1000]
plt.figure(figsize = (22, 6))
sns.histplot(data = df_d, x = " Rocket", hue = "Status Rocket")
plt.show()
Total Money spent for each company
df.head()
| Company Name | Location | Datum | Detail | Status Rocket | Rocket | Status Mission | |
|---|---|---|---|---|---|---|---|
| 0 | SpaceX | LC-39A, Kennedy Space Center, Florida, USA | Fri Aug 07, 2020 05:12 UTC | Falcon 9 Block 5 | Starlink V1 L9 & BlackSky | StatusActive | 50.0 | Success |
| 1 | CASC | Site 9401 (SLS-2), Jiuquan Satellite Launch Ce... | Thu Aug 06, 2020 04:01 UTC | Long March 2D | Gaofen-9 04 & Q-SAT | StatusActive | 29.75 | Success |
| 2 | SpaceX | Pad A, Boca Chica, Texas, USA | Tue Aug 04, 2020 23:57 UTC | Starship Prototype | 150 Meter Hop | StatusActive | NaN | Success |
| 3 | Roscosmos | Site 200/39, Baikonur Cosmodrome, Kazakhstan | Thu Jul 30, 2020 21:25 UTC | Proton-M/Briz-M | Ekspress-80 & Ekspress-103 | StatusActive | 65.0 | Success |
| 4 | ULA | SLC-41, Cape Canaveral AFS, Florida, USA | Thu Jul 30, 2020 11:50 UTC | Atlas V 541 | Perseverance | StatusActive | 145.0 | Success |
df_money = df_.groupby(["Company Name"])[" Rocket"].sum().reset_index()
df_money = df_money[df_money[" Rocket"] > 0]
df_money.head()
| Company Name | Rocket | |
|---|---|---|
| 0 | Arianespace | 16345.00 |
| 1 | Boeing | 1241.00 |
| 2 | CASC | 6340.26 |
| 3 | EER | 20.00 |
| 4 | ESA | 37.00 |
df_money_ = df_money.sort_values(by = [" Rocket"], ascending = False)[:15]
df_money_.head()
| Company Name | Rocket | |
|---|---|---|
| 14 | NASA | 76280.00 |
| 0 | Arianespace | 16345.00 |
| 21 | ULA | 14798.00 |
| 16 | RVSN USSR | 10000.00 |
| 2 | CASC | 6340.26 |
fig = px.bar(df_money_, x = "Company Name", y = " Rocket", title = "Total Spent Money for Each Company")
fig.show()